This file provides code to analyse a subset of a random 1/3 of participants for each lab.

1 Load Data

library(psych) # for SPSS-style PCA
library(paran) # for parallel analyses
## Loading required package: MASS
library(GPArotation) # for robustness checks
library(kableExtra) # for nice tables
library(tidyverse) # for data cleaning
## ── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.3
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()      masks psych::%+%()
## ✖ ggplot2::alpha()    masks psych::alpha()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ dplyr::select()     masks MASS::select()
# create directory for saving figures
if (!dir.exists("figures")) { dir.create("figures") }

options(knitr.kable.NA = '')
knitr::opts_chunk$set(echo = TRUE, 
                      warning = FALSE, 
                      message = FALSE)

R.version.string
## [1] "R version 3.6.0 (2019-04-26)"
set.seed(8675309)

1.1 Simulate Study Data (for Stage 1 RR)

See https://osf.io/87rbg/ for Stage 1 RR code. The code below is modified from the original to account for a different raw data structure and to add additional tables and graphs. All analysis code is identical.

1.2 Load Study Data (SUBSET)

Subset the data (this won’t run unless you have the original full data).

session <- read_csv("data/session.csv")
dat_quest <- read_csv("data/quest_data.csv")
dat_exp <- read_csv("data/exp_data.csv")

session_subset <- session %>%
  filter(user_status %in% c("guest", "registered")) %>%
  group_by(proj_name) %>%
  sample_frac(1/3) %>%
  ungroup() %>%
  pull(session_id)

session %>%
  filter(session_id %in% session_subset) %>%
  write_csv("data/session_subset.csv")

dat_exp %>%
  filter(session_id %in% session_subset) %>%
  write_csv("data/exp_data_subset.csv")

dat_quest %>%
  filter(session_id %in% session_subset) %>%
  write_csv("data/quest_data_subset.csv")

Load study data and demographic questionnaires from the data folder.

session <- read_csv("data/session_subset.csv")
dat_quest <- read_csv("data/quest_data_subset.csv")
dat_exp <- read_csv("data/exp_data_subset.csv")

# reshape questionnaire data to make wide
quest <- dat_quest %>%
  select(session_id, endtime, user_id, q_name, dv) %>%
  group_by(session_id, user_id, q_name) %>%
  arrange(endtime) %>%
  filter(row_number() == 1) %>%
  ungroup() %>%
  spread(q_name, dv, convert = TRUE)

Join experiment and questionnaire data

ratings_raw <-  dat_exp %>%
  left_join(session, by = c("user_id", "session_id")) %>%
  filter(user_status %in% c("guest", "registered")) %>%
  separate(exp_name, c("psa", "language", "trait", "block"), 
           sep = "_") %>%
  select(-psa) %>%
  separate(proj_name, c("psa", "lang", "lab1", "lab2"),
           sep = "_", fill = "right") %>%
  filter(lab1 != "test") %>%
  unite(lab_id, c("lab1", "lab2")) %>%
  select(-psa, lang) %>%
  left_join(quest, by = c("session_id", "user_id")) %>%
  select(language, user_id = session_id, trait, 
         stim_id = trial_name, 
         order, rt, rating = dv,
         country, sex, age, ethnicity, lab = lab_id, block) %>%
  mutate(trait = recode(trait,
                        "Res" = "responsible",
                        "Wei" = "weird",
                        "Old" = "old",
                        "Tru" = "trustworthy",
                        "Dom" = "dominant",
                        "Emo" = "emostable",
                        "Agg" = "aggressive",
                        "Car" = "caring",
                        "Int" = "intelligent",
                        "Unh" = "unhappy",
                        "Soc" = "sociable",
                        "Mea" = "mean",
                        "Con" = "confident",
                        "Att" = "attractive"
  )) 
  
write_csv(ratings_raw, "data/ratings_raw_subset.csv")

1.3 Load Auxillary Data

Data on regions and stimuli.

1.3.1 Load Region Data

regions <- read_csv("data/regions.csv")

1.3.2 Load Stimulus Info

stim_info <- read_csv("data/psa_cfd_faces.csv") %>%
  mutate(ethnicity = recode(Race, "A" = "asian", "B" = "black", "L" = "latinx", "W" = "white"),
         gender = recode(Gender, "M" = "male", "F" = "female")
  )

stim_info %>%
  group_by(ethnicity, gender) %>%
  summarise(
    n = n(),
    mean_age = round(mean(Age), 2),
    sd_age = round(sd(Age), 2)
  ) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
ethnicity gender n mean_age sd_age
asian female 15 26.15 3.33
asian male 15 26.40 3.21
black female 15 27.00 3.51
black male 15 28.07 4.27
latinx female 15 25.27 2.42
latinx male 15 26.31 4.00
white female 15 25.77 3.03
white male 15 26.06 4.46
stim_n_male <- sum(stim_info$gender == "male")
stim_n_female <- sum(stim_info$gender == "female")
mean_age <- mean(stim_info$Age) %>% round(2)
sd_age <- sd(stim_info$Age) %>% round(2)
min_age <- min(stim_info$Age)
max_age <- max(stim_info$Age)

Stimuli in our study will be an open-access, full-color, face image set consisting of 60 men and 60 women (mean age=26.38 years, SD=3.57 years, range=18.7307692 to 34.9310345 years), taken under standardized photographic conditions (Ma et al., 2015).

1.3.3 Load O&T 2008 Data

# read original OT data and get into same format as data_agg will be

traits <- ratings_raw %>%
  filter(trait != "old", !is.na(trait)) %>%
  arrange(trait) %>%
  pull(trait) %>%
  unique()
  
ot_data <- readxl::read_excel("data/Karolinska_14trait_judgmentswithlabels.xls") %>%
  mutate(region = "(Oosterhof & Todorov, 2008)") %>%
  rename(stim_id = `Todorov Label`,
         emostable = `emotionally stable`) %>%
  select(region, stim_id, traits)

1.4 Data Processing

1.4.1 Join Data

ratings <- ratings_raw %>%
  rename(qcountry = country) %>%
  separate(lab, c("country", "lab")) %>%
  left_join(regions, by = "country") %>%
  filter(trait != "old")

1.4.2 Graph distributions for trait by region

# plot styles
bgcolor <- "white"
textcolor <- "black"
PSA_theme <- theme(
    plot.background = element_rect(fill = bgcolor, color = NA),
    panel.background = element_rect(fill = NA, color = "grey"),
    legend.background = element_rect(fill = NA),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    text = element_text(color = textcolor, size=15),
    axis.text = element_text(color = textcolor, size=10),
    strip.text.y = element_text(angle = 0, hjust = 0)
  )
ggplot(ratings, aes(rating, fill = trait)) +
  geom_histogram(binwidth = 1, color = "grey", show.legend = F) +
  facet_grid(region~trait, space = "free") +
  scale_x_continuous(breaks = 1:9) +
  PSA_theme

1.5 Data checks

part <- ratings %>%
  group_by(user_id, sex, age, country, language, trait, region, lab) %>%
  summarise(trials = n(),
            stim_n = n_distinct(stim_id)) %>%
  ungroup()

1.5.1 How many participants completed at least one rating for each of 120 stimuli

part %>% 
  mutate(n120 = ifelse(stim_n == 120, "rated all 120", "rated < 120")) %>%
  count(region, n120) %>%
  spread(n120, n) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region rated < 120 rated all 120
Africa 4 186
Asia 19 264
Australia & New Zealand 11 371
Central America & Mexico 10 165
Eastern Europe 40 284
Middle East 6 146
Scandinavia 13 229
South America 9 454
UK 2 121
USA & Canada 22 1182
Western Europe 14 659

1.5.2 Participants who did not complete exactly 240 trials

part %>% 
  mutate(n240 = case_when(
    trials == 240 ~ "rated 240", 
    trials > 240 ~ "rated > 240",
    trials < 120 ~ "rated < 120",
    trials < 240 ~ "rated 120-239"
  )) %>%
  count(region, n240) %>%
  spread(n240, n, fill = 0) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region rated < 120 rated > 240 rated 120-239 rated 240
Africa 3 1 35 151
Asia 18 3 65 197
Australia & New Zealand 10 2 76 294
Central America & Mexico 10 0 64 101
Eastern Europe 40 0 69 215
Middle East 6 1 21 124
Scandinavia 13 0 57 172
South America 9 2 87 365
UK 2 0 11 110
USA & Canada 22 0 120 1062
Western Europe 12 0 50 611

1.5.3 Participants with low-variance responses in block 1

identical_rating_threshold <- 0.75 * 120 # use this for registered analyses

inv_participants <- ratings %>%
  filter(block == 1) %>%
  count(user_id, region, trait, rating) %>%
  group_by(user_id, region, trait) %>%
  filter(n == max(n)) %>% # find most common rating for each P
  ungroup() %>%
  filter(n >= identical_rating_threshold) # select Ps who gave the same rating to >= 75% of stimuli

inv <- inv_participants %>%
  count(region, trait) %>%
  spread(region, n, fill = 0) %>%
  mutate(TOTAL = rowSums(select_if(., is.numeric), na.rm = T))

inv_total <-  group_by(inv) %>% 
  summarise_if(is.numeric, sum, na.rm = T) %>%
  mutate(trait = "TOTAL")
 
bind_rows(inv,inv_total) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
trait Africa Asia Australia & New Zealand Central America & Mexico Eastern Europe Middle East Scandinavia South America UK USA & Canada Western Europe TOTAL
aggressive 2 1 5 0 0 1 1 1 1 11 4 27
attractive 1 1 1 0 0 1 1 1 0 6 3 15
caring 1 0 0 0 0 0 0 1 0 6 1 9
confident 2 0 0 1 0 1 1 2 0 0 0 7
dominant 2 1 0 0 1 0 1 0 0 5 0 10
emostable 1 2 3 1 1 0 0 1 0 4 3 16
intelligent 0 1 3 0 2 0 2 1 0 6 2 17
mean 0 0 2 1 1 1 2 2 0 12 5 26
responsible 0 0 2 0 1 0 0 2 2 7 1 15
sociable 0 1 0 1 0 0 0 2 0 5 1 10
trustworthy 0 3 0 1 4 1 1 1 1 4 8 24
unhappy 0 1 0 0 1 0 1 1 0 2 2 8
weird 2 2 3 1 2 0 1 6 0 8 4 29
TOTAL 11 13 19 6 13 5 11 21 4 76 34 213

1.5.4 Participants with no region

part %>% 
  filter(is.na(region)) %>%
  select(user_id, country, lab)
## # A tibble: 0 x 3
## # … with 3 variables: user_id <dbl>, country <chr>, lab <chr>

1.5.5 Remove excluded data and average ratings

data <- ratings %>%
  group_by(user_id, trait) %>%
  filter(
    # did not complete 1+ ratings for each of 120 stimuli
    dplyr::n_distinct(stim_id) == 120,      
    !is.na(region)   # did not specify region (none expected)
  ) %>%
  anti_join(inv_participants, by = "user_id") %>% # exclude Ps with low variance
  ungroup() %>%
  group_by(user_id, age, sex, ethnicity, language, lab, country, region, trait, stim_id) %>%
  summarise(rating = mean(rating)) %>% # average ratings across 2 
  ungroup()

write_csv(data, "data/psa001_ind_subset.csv")

1.6 Participant Demographics

data %>%
  group_by(user_id, language) %>%
  summarise() %>%
  ungroup() %>%
  group_by(language) %>%
  summarise(n = n()) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
language n
EL 33
ENG 1954
ES-PE 39
FAS 17
FR-BE 28
FR-CH 38
FRE 76
GER 150
HU 62
ITA 52
NL 79
NOR 113
POL 12
PT 26
PT-BR 65
RO 9
RU 77
SLO 88
SPA 574
SRP 23
SV 62
THA 31
TUR 93
ZH-CN 33
ZH-S 117
by_region <- data %>%
  group_by(user_id, region) %>%
  summarise() %>%
  ungroup() %>%
  group_by(region) %>%
  summarise(n = n()) %>%
  add_row(region = "TOTAL", n = n_distinct(data$user_id)) %>%
  knitr::kable("html") %>%
  kable_styling("striped")

save_kable(by_region, "figures/n_by_region.html")
           
by_region
region n
Africa 176
Asia 251
Australia & New Zealand 352
Central America & Mexico 160
Eastern Europe 271
Middle East 141
Scandinavia 219
South America 433
UK 117
USA & Canada 1106
Western Europe 625
TOTAL 3851

1.6.1 Age and sex distribution per region

data %>%
  group_by(user_id, sex, age, region) %>%
  summarise() %>%
  ungroup() %>%
  group_by(region) %>%
  mutate(n = n()) %>%
  ungroup() %>%
  ggplot(aes(as.numeric(age), fill = sex)) +
  geom_histogram(binwidth = 5, color = "grey") +
  geom_text(aes(x=0, y=5, label = paste0("n=",n)), color = "black") +
  labs(title="", y="", x="Participant age in 5-year bins") +
  facet_grid(region~., scales="free_y") +
  PSA_theme

1.6.2 Participants per trait per region

data %>%
  group_by(trait, region) %>%
  summarise(n = n_distinct(user_id)) %>%
  ggplot(aes(trait, n)) +
  geom_col(aes(fill = trait), show.legend = F) +
  geom_hline(yintercept = 15) +
  facet_grid(region~., scale = "free") +
  labs(title="", x="", y="Participants per trait per region") +
  theme( axis.text.x = element_text(angle = -45, hjust = 0) ) + 
  PSA_theme

ggsave("figures/participants_per_trait_per_region.png", width = 15, height = 8)

1.6.3 Participants per lab

labs <- data %>%
  unite(lab, country, lab) %>%
  group_by(region, lab, user_id) %>%
  summarise() %>%
  ungroup() %>%
  count(region, lab) %>%
  arrange(region, lab)

write_csv(labs, "data/labs_post_exclusions.csv")

knitr::kable(labs) %>%
  kable_styling("striped")
region lab n
Africa KEN_001 54
Africa KEN_002 61
Africa NGA_001 16
Africa RSA_001 17
Africa RSA_002 28
Asia CHN_001 17
Asia CHN_006 33
Asia CHN_007 19
Asia CHN_014 30
Asia MAS_001 16
Asia MAS_002 18
Asia MAS_003 15
Asia MAS_004 39
Asia TAI_001 10
Asia TAI_002 23
Asia THA_001 31
Australia & New Zealand AUS_004 29
Australia & New Zealand AUS_005 28
Australia & New Zealand AUS_006 70
Australia & New Zealand AUS_007 87
Australia & New Zealand AUS_008 29
Australia & New Zealand AUS_011 28
Australia & New Zealand AUS_014 29
Australia & New Zealand NZL_001 7
Australia & New Zealand NZL_002 45
Central America & Mexico ECU_001 39
Central America & Mexico MEX_002 67
Central America & Mexico MEX_003 23
Central America & Mexico SLV_001 31
Eastern Europe HUN_001 62
Eastern Europe POL_001 12
Eastern Europe ROU_001 9
Eastern Europe RUS_005 77
Eastern Europe SRB_002 23
Eastern Europe SVK_001 27
Eastern Europe SVK_002 61
Middle East IRI_001 17
Middle East TUR_001 22
Middle East TUR_003 35
Middle East TUR_007 4
Middle East TUR_009 36
Middle East UAE_001 27
Scandinavia DNK_001 32
Scandinavia FIN_001 14
Scandinavia NOR_001 46
Scandinavia NOR_002 43
Scandinavia NOR_003 21
Scandinavia NOR_004 15
Scandinavia SWE_004 19
Scandinavia SWE_005 11
Scandinavia SWE_006 18
South America ARG_001 31
South America BRA_001 36
South America BRA_003 29
South America CHI_001 60
South America CHI_003 29
South America CHI_004 27
South America CHI_005 32
South America COL_003 15
South America COL_004 135
South America PER_001 21
South America PER_002 18
UK UK_001 10
UK UK_005 33
UK UK_006 13
UK UK_011 21
UK UK_018 14
UK UK_022 10
UK UK_024 16
USA & Canada CAN_001 14
USA & Canada CAN_008 22
USA & Canada CAN_015 27
USA & Canada CAN_017 42
USA & Canada CAN_018 104
USA & Canada PSA_001 28
USA & Canada PSA_002 105
USA & Canada USA_001 30
USA & Canada USA_003 28
USA & Canada USA_005 15
USA & Canada USA_011 12
USA & Canada USA_014 24
USA & Canada USA_020 64
USA & Canada USA_025 32
USA & Canada USA_026 25
USA & Canada USA_030 34
USA & Canada USA_031 28
USA & Canada USA_033 16
USA & Canada USA_036 15
USA & Canada USA_038 24
USA & Canada USA_039 42
USA & Canada USA_042 9
USA & Canada USA_050 55
USA & Canada USA_051 51
USA & Canada USA_054 31
USA & Canada USA_055 39
USA & Canada USA_065 14
USA & Canada USA_067 7
USA & Canada USA_075 41
USA & Canada USA_083 6
USA & Canada USA_113 38
USA & Canada USA_114 32
USA & Canada USA_115 17
USA & Canada USA_116 17
USA & Canada USA_117 18
Western Europe AUT_001 31
Western Europe AUT_002 29
Western Europe AUT_005 17
Western Europe BEL_001 28
Western Europe ESP_001 35
Western Europe ESP_005 39
Western Europe ESP_006 11
Western Europe FRA_003 27
Western Europe FRA_004 16
Western Europe FRA_005 12
Western Europe FRA_006 21
Western Europe GER_011 18
Western Europe GER_012 22
Western Europe GER_015 33
Western Europe GER_017 18
Western Europe GRE_002 33
Western Europe ITA_001 30
Western Europe ITA_003 22
Western Europe NED_008 40
Western Europe NED_009 79
Western Europe POR_001 26
Western Europe SUI_003 38

2 Analyses

2.1 Main Analysis

First, we will calculate the average rating for each face separately for each of the 13 traits. Like Oosterhof and Todorov (2008), we will then subject these mean ratings to principal component analysis with orthogonal components and no rotation. Using the criteria reported in Oosterhof and Todorov’s (2008) paper, we will retain and interpret the components with an Eigenvalue > 1.

2.1.1 Calculate Alphas

# takes a long time, so saves the results and loads from a file in the next chunk if set to eval = FALSE
data_alpha <- data %>%
  select(user_id, region, stim_id, rating, trait) %>%
  spread(stim_id, rating, sep = "_") %>%
  group_by(trait, region) %>%
  nest() %>%
  mutate(alpha = map(data, function(d) {
    if (dim(d)[1] > 2) {
      # calculate cronbach's alpha
      subdata <- d %>%
        as_tibble() %>%
        select(-user_id) %>%
        t()

      capture.output(suppressWarnings(a <- psych::alpha(subdata)))
      a$total["std.alpha"] %>% pluck(1) %>% round(3)
    } else {
      NA
    }
  })) %>%
  select(-data) %>%
  unnest(alpha) %>%
  ungroup()

saveRDS(data_alpha, file = "data/alphas.RDS")
data_alpha <- readRDS("data/alphas.RDS")

n_alpha <- data %>%
  select(user_id, region, trait) %>%
  distinct() %>%
  count(region, trait) %>%
  left_join(data_alpha, by = c("region", "trait")) %>%
  mutate(
    trait = as.factor(trait),
    region = str_replace(region, " (and|&) ", " &\n"),
    region = as.factor(region),
    region = factor(region, levels = rev(levels(region)))
  )

n_alpha %>%
  mutate(stat = paste("α =", alpha, "<br>n =", n)) %>%
  select(Region = region, stat, trait) %>%
  spread(trait, stat) %>%
  knitr::kable("html", escape = FALSE) %>%
  column_spec(2:14, width = "7%") %>%
  kable_styling("striped", font_size = 9) %>%
  save_kable("figures/alpha.html")
ggplot(n_alpha) +
  geom_tile(aes(trait, region, fill=alpha >=.7), 
           color = "grey20", show.legend = F) +
  geom_text(aes(trait, region, label=sprintf("α = %0.2f\nn = %.0f", alpha, n)), color = "black", size = 5) +
  scale_y_discrete(drop=FALSE) +
  scale_x_discrete(position = "top") +
  labs(x="", y="", title="") +
  scale_fill_manual(values = c("white", "red")) +
  PSA_theme

ggsave("figures/alphas.png", width = 18, height = 10)

2.1.2 Calculate Aggregate Scores

data_agg <- data %>%
  group_by(region, trait, stim_id) %>%
  summarise(rating = mean(rating)) %>%
  ungroup() %>%
  spread(trait, rating)

write_csv(data_agg, "data/psa001_agg_subset.csv")
data_agg %>%
  gather("trait", "rating", aggressive:weird) %>%
  ggplot(aes(rating, fill = trait)) +
  geom_density(show.legend = F) +
  facet_grid(region~trait) +
  PSA_theme

ggsave("figures/agg_scores.png", width = 15, height = 8)

2.1.3 Principal Component Analysis (PCA)

The number of components to extract was determined using eigenvalues > 1 for each world region. PCA was conducted using the psych::principal() function with rotate="none".

# function to calculate PCA

psa_pca <- function(d) {
  traits <- select(d, -stim_id) %>% 
    select_if(colSums(!is.na(.)) > 0) # omits missing traits
  
  # principal components analysis (SPSS-style, following Oosterhof & Todorov)
  ev <- eigen(cor(traits))$values
  nfactors <- sum(ev > 1)
  
  pca <- principal(
    traits, 
    nfactors=nfactors, 
    rotate="none"
  )
  
  stats <- pca$Vaccounted %>% 
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "stat")
  
  unclass(pca$loadings) %>%
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "trait") %>%
    bind_rows(stats) %>%
    gather("pc", "loading", 2:(ncol(.)-1))
}
pca_analyses <- data_agg %>%
  bind_rows(ot_data) %>%
  group_by(region) %>%
  nest() %>%
  mutate(pca = map(data, psa_pca)) %>%
  select(-data) %>%
  unnest(pca) %>%
  ungroup() %>%
  mutate(pc = str_replace(pc, "PC", "Component "))

2.1.3.1 Number of Components (and proportion variance) by region

pca_analyses %>%
  filter(rowname == "Proportion Var") %>%
  group_by(region) %>%
  mutate(nPCs = n()) %>%
  ungroup() %>%
  spread(pc, loading) %>%
  select(-rowname, -type) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region nPCs Component 1 Component 2 Component 3
(Oosterhof & Todorov, 2008) 2 0.633 0.183
Africa 2 0.423 0.126
Asia 3 0.581 0.168 0.084
Australia & New Zealand 3 0.580 0.167 0.094
Central America & Mexico 3 0.452 0.191 0.079
Eastern Europe 3 0.568 0.176 0.083
Middle East 3 0.428 0.230 0.090
Scandinavia 3 0.576 0.167 0.079
South America 2 0.535 0.219
UK 3 0.499 0.163 0.110
USA & Canada 3 0.634 0.182 0.084
Western Europe 2 0.636 0.180

2.1.3.2 Trait Loadings by Region and Component

# order traits by P1 loading if loads positively on P1, or by -P2 loading otherwise
trait_order <- pca_analyses %>%
  filter(region == "(Oosterhof & Todorov, 2008)", type == "trait") %>%
  spread(pc, loading) %>% 
  arrange(ifelse(`Component 1`>0,`Component 1`,-`Component 2`)) %>% 
  pull(rowname)

pca_prop_var <- pca_analyses %>%
  filter(rowname == "Proportion Var") %>%
  select(-rowname, -type) %>%
  mutate(loading = round(loading, 2))

pca_analyses %>%
  filter(type == "trait") %>%
  select(-type) %>%
  mutate(
    trait = as.factor(rowname),
    trait = factor(trait, levels = c(trait_order, "Prop.Var")),
    loading = round(loading, 2)
  ) %>%
  ggplot() +
  geom_tile(aes(pc, trait, fill=loading), show.legend = F) +
  geom_text(aes(pc, trait, label=sprintf("%0.2f", loading)), color = "black") +
  geom_text(data = pca_prop_var, aes(pc, y = 14, label=sprintf("%0.2f", loading)), color = "black") +
  scale_y_discrete(drop=FALSE) +
  scale_x_discrete(position = "top") + 
  scale_fill_gradient2(low = "dodgerblue", mid = "grey90", high = "#FF3333", limits=c(-1.1, 1.1)) +
  facet_wrap(~region, scales = "fixed", ncol = 4) +
  labs(x = "", y = "", title="") +
  PSA_theme

ggsave("figures/PCA_loadings.png", width = 15, height = 10)

2.1.3.3 Replication Criteria (PCA)

Oosterhof and Todorov’s valence-dominance model will be judged to have been replicated in a given world region if the first two components both have Eigenvalues > 1, the first component (i.e., the one explaining more of the variance in ratings) is correlated strongly (loading > .7) with trustworthiness and weakly (loading < .5) with dominance, and the second component (i.e., the one explaining less of the variance in ratings) is correlated strongly (loading > .7) with dominance and weakly (loading < .5) with trustworthiness. All three criteria need to be met to conclude that the model was replicated in a given world region.

pca_rep <- pca_analyses %>%
  filter(
    type == "trait", 
    rowname %in% c("trustworthy", "dominant"),
    pc %in% c("Component 1", "Component 2")
  ) %>%
  select(-type) %>%
  mutate(rowname = paste(pc, rowname)) %>%
  select(-pc) %>%
  spread(rowname, loading) %>%
  rename(Region = region) %>%
  mutate(Replicated = ifelse(
    `Component 1 dominant` < .5 & `Component 1 trustworthy` > .7 & 
    `Component 2 dominant` > .7 & `Component 2 trustworthy` < .5,
    "Yes", "No"
  )) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html", col.names = c("Region", "Dominant", "Trustworthy", "Dominant", "Trustworthy", "Replicated")) %>%
  add_header_above(c(" " = 1, "Component 1" = 2, "Component 2" = 2, "  " = 1)) %>%
  kable_styling("striped")

save_kable(pca_rep, "figures/PCA_rep_criteria.html")

pca_rep
Component 1
Component 2
Region Dominant Trustworthy Dominant Trustworthy Replicated
(Oosterhof & Todorov, 2008) -0.244 0.941 0.929 -0.060 Yes
Africa 0.202 0.847 0.660 -0.101 No
Asia 0.300 0.884 0.855 -0.027 Yes
Australia & New Zealand 0.172 0.921 0.903 -0.092 Yes
Central America & Mexico 0.111 0.808 0.871 -0.071 Yes
Eastern Europe 0.437 0.842 0.788 -0.117 Yes
Middle East 0.335 0.695 0.749 -0.323 No
Scandinavia 0.426 0.937 0.836 -0.120 Yes
South America 0.150 0.866 0.940 -0.247 Yes
UK 0.493 0.858 0.716 -0.045 Yes
USA & Canada 0.382 0.952 0.826 -0.096 Yes
Western Europe 0.379 0.930 0.866 -0.192 Yes

2.1.4 Factor Congruence (PCA)

This analysis determines the congruence between the components from Oosterhof & Todorov (2008) and the components in each world region, using the psych::factor.congruence function. Congruence is labeled “not similar” for values < 0.85, “fairly similar”, for values < 0.09, and “equal” for values >= 0.95.

# get loadings for original O&T2008
ot2008_pca_loadings <- pca_analyses %>%
  filter(region == "(Oosterhof & Todorov, 2008)", type == "trait") %>%
  select(-region, -type) %>%
  spread(pc, loading) %>%
  column_to_rownames()

# run factor congruence for each region  
fc_pca <- pca_analyses %>%
  filter(type == "trait", region != "(Oosterhof & Todorov, 2008)") %>%
  select(-type) %>%
  spread(pc, loading) %>%
  group_by(region) %>%
  nest() %>%
  mutate(fc = map(data, function(d) {
    loadings <- d %>%
      as.data.frame() %>%
      select(rowname, `Component 1`, `Component 2`) %>%
      arrange(rowname) %>%
      column_to_rownames()
    
    psych::factor.congruence(loadings, 
                             ot2008_pca_loadings, 
                             digits = 4) %>%
      as.data.frame() %>%
      rownames_to_column(var = "regionPC")
  })) %>%
  select(-data) %>%
  unnest(fc) %>%
  ungroup()

pc_fc_table <- fc_pca %>%
  gather(origPC, congruence, `Component 1`:`Component 2`) %>%
  mutate(sig = case_when(
           congruence < .85 ~ "not similar",
           congruence < .95 ~ "fairly similar",
           congruence >= .95 ~ "equal"
         ),
         congruence = sprintf("%0.3f", congruence)) %>%
  filter(regionPC == origPC) %>%
  select(region, PC = regionPC, congruence, sig) %>%
  gather(k, v, congruence, sig) %>%
  unite(PC, PC, k, remove = T) %>%
  spread(PC, v) %>%
  knitr::kable("html", digits = 3, align = 'lrlrl', escape = F,
               col.names = c("Region", "Loading", "Congruence", "Loading", "Congruence")) %>%
  add_header_above(c(" " = 1, "Component 1" = 2, "Component 2" = 2)) %>%
  kable_styling("striped")

save_kable(pc_fc_table, "figures/PCA_factor_congruence.html")

pc_fc_table
Component 1
Component 2
Region Loading Congruence Loading Congruence
Africa 0.964 equal 0.891 fairly similar
Asia 0.977 equal 0.836 not similar
Australia & New Zealand 0.985 equal 0.966 equal
Central America & Mexico 0.988 equal 0.915 fairly similar
Eastern Europe 0.964 equal 0.960 equal
Middle East 0.951 equal 0.836 not similar
Scandinavia 0.966 equal 0.947 fairly similar
South America 0.985 equal 0.952 equal
UK 0.948 fairly similar 0.929 fairly similar
USA & Canada 0.973 equal 0.953 equal
Western Europe 0.973 equal 0.939 fairly similar

2.2 Robustness Checks

2.2.1 Exploratory Factor Analysis (EFA)

The number of factors to extract was determined using parallel analysis (paran::paran()) for each world region. EFA was conducted using the psych::fa() function with all default options.

# function to calculate EFA

psa_efa <- function(d) {
  traits <- select(d, -stim_id) %>% 
    select_if(colSums(!is.na(.)) > 0) # omits missing traits
  
  # Parallel Analysis with Dino's 'paran' package. 
  nfactors <- paran(traits, iterations = 5000, 
          centile = 0, quietly = TRUE, 
          status = FALSE, all = TRUE, 
          cfa = TRUE, graph = FALSE)
  
  efa <- psych::fa(traits, nfactors$Retained) 
  
  stats <- efa$Vaccounted %>% 
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "stat")
  
  unclass(efa$loadings) %>%
    as.data.frame() %>%
    rownames_to_column() %>%
    mutate(type = "trait") %>%
    bind_rows(stats) %>%
    gather("mr", "loading", 2:(ncol(.)-1))
}

Calculate for each region

efa_analyses <- data_agg %>%
  bind_rows(ot_data) %>%
  group_by(region) %>%
  nest() %>%
  mutate(efa = map(data, psa_efa)) %>%
  select(-data) %>%
  unnest(efa) %>%
  ungroup() %>%
  mutate(mr = str_replace(mr, "MR", "Factor "))

2.2.1.1 Number of Factors (and proportion variance) by region

Note: many of the analyses will produce warnings in the subset version.

efa_analyses %>%
  filter(rowname == "Proportion Var") %>%
  group_by(region) %>%
  mutate(nMRs = n()) %>%
  ungroup() %>%
  spread(mr, loading) %>%
  select(-rowname, -type) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html") %>%
  kable_styling("striped")
region nMRs Factor 1 Factor 2 Factor 3 Factor 4 Factor 5
(Oosterhof & Todorov, 2008) 2 0.564 0.227
Africa 4 0.210 0.097 0.128 0.129
Asia 3 0.363 0.136 0.288
Australia & New Zealand 4 0.267 0.132 0.181 0.253
Central America & Mexico 3 0.276 0.167 0.200
Eastern Europe 3 0.424 0.189 0.164
Middle East 3 0.238 0.187 0.251
Scandinavia 4 0.314 0.147 0.187 0.167
South America 5 0.234 0.207 0.185 0.056 0.17
UK 4 0.268 0.137 0.190 0.155
USA & Canada 4 0.231 0.233 0.222 0.215
Western Europe 4 0.258 0.161 0.238 0.246

2.2.1.2 Trait Loadings by Region and Factor

efa_prop_var <- efa_analyses %>%
  filter(rowname == "Proportion Var") %>%
  select(-rowname, -type) %>%
  mutate(loading = round(loading, 2))

efa_analyses %>%
  filter(type == "trait") %>%
  select(-type) %>%
  mutate(
    trait = as.factor(rowname),
    trait = factor(trait, levels = c(trait_order, "Prop.Var")),
    loading = round(loading, 2)
  ) %>%
  ggplot() +
  geom_tile(aes(mr, trait, fill=loading), show.legend = F) +
  geom_text(aes(mr, trait, label=sprintf("%0.2f", loading)), color = "black") +
  geom_text(data = efa_prop_var, aes(mr, y = 14, label=sprintf("%0.2f", loading)), color = "black") +
  scale_y_discrete(drop=FALSE) +
  scale_x_discrete(position = "top") + 
  scale_fill_gradient2(low = "dodgerblue", mid = "grey90", high = "#FF3333", limits=c(-1.1, 1.1)) +
  facet_wrap(~region, scales = "fixed", ncol = 4) +
  labs(x = "", y = "", title="") +
  PSA_theme

ggsave("figures/EFA_loadings.png", width = 15, height = 10)

2.2.1.3 Replication Criteria (EFA)

Oosterhof and Todorov’s valence-dominance model will be judged to have been replicated in a given world region if the the first factor is correlated strongly (loading > .7) with trustworthiness and weakly (loading < .5) with dominance, and the second factor is correlated strongly (loading > .7) with dominance and weakly (loading < .5) with trustworthiness. All these criteria need to be met to conclude that the model was replicated in a given world region.

efa_rep <- efa_analyses %>%
  filter(
    type == "trait", 
    rowname %in% c("trustworthy", "dominant"),
    mr %in% c("Factor 1", "Factor 2")
  ) %>%
  select(-type) %>%
  mutate(rowname = paste(mr, rowname)) %>%
  select(-mr) %>%
  spread(rowname, loading) %>%
  rename(Region = region) %>%
  mutate(Replicated = ifelse(
    `Factor 1 dominant` < .5 & `Factor 1 trustworthy` > .7 & 
    `Factor 2 dominant` > .7 & `Factor 2 trustworthy` < .5,
    "Yes", "No"
  )) %>%
  mutate_if(is.numeric, round, 3) %>%
  knitr::kable("html", col.names = c("Region", "Dominant", "Trustworthy", "Dominant", "Trustworthy", "Replicated")) %>%
  add_header_above(c(" " = 1, "Factor 1" = 2, "Factor 2" = 2, "  " = 1)) %>%
  kable_styling("striped")

save_kable(efa_rep, "figures/EFA_rep_criteria.html")

efa_rep
Factor 1
Factor 2
Region Dominant Trustworthy Dominant Trustworthy Replicated
(Oosterhof & Todorov, 2008) 0.228 0.826 0.970 -0.288 Yes
Africa -0.056 0.575 0.554 0.157 No
Asia 0.331 0.679 0.809 -0.133 No
Australia & New Zealand 0.043 0.487 0.898 0.057 No
Central America & Mexico 0.200 0.560 0.803 -0.181 No
Eastern Europe 0.520 0.821 0.631 -0.271 No
Middle East 0.170 0.094 0.545 -0.624 No
Scandinavia 0.165 0.700 0.870 -0.070 No
South America 0.160 0.135 0.935 -0.285 No
UK 0.211 0.706 0.670 -0.166 No
USA & Canada 0.444 0.320 0.662 -0.425 No
Western Europe 0.226 0.651 0.866 -0.138 No

2.2.2 Factor Congruence (EFA)

This analysis determines the congruence between the factors from Oosterhof & Todorov (2008) and the factors in each world region, using the psych::factor.congruence function. Congruence is labeled “not similar” for values < 0.85, “fairly similar”, for values < 0.09, and “equal” for values >= 0.95.

# get loadings for original O&T2008
ot2008_efa_loadings <- efa_analyses %>%
  filter(region == "(Oosterhof & Todorov, 2008)", type == "trait") %>%
  select(-region, -type) %>%
  spread(mr, loading) %>%
  column_to_rownames()

# run factor congruence for each region 
fc_efa <- efa_analyses %>%
  filter(type == "trait", region != "(Oosterhof & Todorov, 2008)") %>%
  select(-type) %>%
  spread(mr, loading) %>%
  group_by(region) %>%
  nest() %>%
  mutate(fc = map(data, function(d) {
    loadings <- d %>%
      as.data.frame() %>%
      select(rowname, `Factor 1`, `Factor 2`) %>%
      arrange(rowname) %>%
      column_to_rownames()
    
    psych::factor.congruence(loadings, 
                             ot2008_efa_loadings, 
                             digits = 4) %>%
  as.data.frame() %>%
  rownames_to_column(var = "regionMR")
  })) %>%
  select(-data) %>%
  unnest(fc) %>%
  ungroup()

mr_fc_table <- fc_efa %>%
  gather(origMR, congruence, `Factor 1`:`Factor 2`) %>%
  mutate(sig = case_when(
           congruence < .85 ~ "not similar",
           congruence < .95 ~ "fairly similar",
           congruence >= .95 ~ "equal"
         ),
         congruence = sprintf("%0.3f", congruence)) %>%
  filter(regionMR == origMR) %>%
  select(region, MR = regionMR, congruence, sig) %>%
  gather(k, v, congruence, sig) %>%
  unite(MR, MR, k, remove = T) %>%
  spread(MR, v) %>%
  knitr::kable("html", digits = 3, align = 'lrlrl', 
               col.names = c("Region", "Loading", "Congruence", "Loading", "Congruence")) %>%
  add_header_above(c(" " = 1, "Factor 1" = 2, "Factor 2" = 2)) %>%
  kable_styling("striped")


save_kable(mr_fc_table, "figures/EFA_factor_congruence.html")

mr_fc_table
Factor 1
Factor 2
Region Loading Congruence Loading Congruence
Africa 0.579 not similar 0.501 not similar
Asia 0.858 fairly similar 0.933 fairly similar
Australia & New Zealand 0.813 not similar 0.784 not similar
Central America & Mexico 0.841 not similar 0.936 fairly similar
Eastern Europe 0.906 fairly similar 0.965 equal
Middle East 0.726 not similar 0.879 fairly similar
Scandinavia 0.830 not similar 0.915 fairly similar
South America 0.713 not similar 0.972 equal
UK 0.781 not similar 0.962 equal
USA & Canada 0.751 not similar 0.955 equal
Western Europe 0.757 not similar 0.928 fairly similar